@article{chung2014empirical,
  title={Empirical evaluation of gated recurrent neural networks on sequence modeling},
  author={Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1412.3555},
  year={2014},
  url={https://arxiv.org/abs/1412.3555}
}

@article{zaremba2014recurrent,
  title={Recurrent neural network regularization},
  author={Zaremba, Wojciech and Sutskever, Ilya and Vinyals, Oriol},
  journal={arXiv preprint arXiv:1409.2329},
  year={2014},
  url={https://arxiv.org/abs/1409.2329}
}

@inproceedings{jozefowicz2015empirical,
  title={An empirical exploration of recurrent network architectures},
  author={Jozefowicz, Rafal and Zaremba, Wojciech and Sutskever, Ilya},
  booktitle={International Conference on Machine Learning},
  pages={2342--2350},
  year={2015}
}

@article{sak2014long,
  title={Long short-term memory based recurrent neural network architectures for large vocabulary speech recognition},
  author={Sak, Ha{\c{s}}im and Senior, Andrew and Beaufays, Fran{\c{c}}oise},
  journal={arXiv preprint arXiv:1402.1128},
  year={2014},
  url={https://arxiv.org/abs/1402.1128}
}

@inproceedings{gal2016theoretically,
  title={A theoretically grounded application of dropout in recurrent neural networks},
  author={Gal, Yarin and Ghahramani, Zoubin},
  booktitle={Advances in neural information processing systems},
  pages={1019--1027},
  year={2016}
}

@inproceedings{xingjian2015convolutional,
  title={Convolutional LSTM network: A machine learning approach for precipitation nowcasting},
  author={Xingjian, SHI and Chen, Zhourong and Wang, Hao and Yeung, Dit-Yan and Wong, Wai-Kin and Woo, Wang-chun},
  booktitle={Advances in neural information processing systems},
  pages={802--810},
  year={2015}
}

@article{buchlovsky2019tf,
  title={{TF}-{R}eplicator: {D}istributed Machine Learning for Researchers},
  author={Buchlovsky, Peter and Budden, David and Grewe, Dominik and Jones, Chris and Aslanides, John and Besse, Frederic and Brock, Andy and Clark, Aidan and Colmenarejo, Sergio G{\'o}mez and Pope, Aedan and others},
  journal={arXiv preprint arXiv:1902.00465},
  year={2019},
  url={https://arxiv.org/abs/1902.00465}
}

@article{buchlovsky2019distribution,
  author={Buchlovsky, Peter and Grewe, Dominik and Gupta, Priya and Hennigan, Tom and Hseu, Jonathan and Jones, Chris and Levenberg, Josh},
  title={Distribution {S}trategy - {R}evised {API}},
  journal={TensorFlow Community RFCs, Google / DeepMind},
  year={2018},
  url={https://github.com/tensorflow/community/pull/25}
}

@article{agarwal2019stateful,
  author={Agarwal, Ashish and Berthelot, David and Hennigan, Tom and Passos, Alex and Reynolds, Malcolm},
  title={Stateful Containers with tf.{M}odule},
  journal={TensorFlow Community RFCs, Google / DeepMind},
  year={2019},
  url={https://github.com/tensorflow/community/pull/56}
}

@article{saxe2013exact,
  title={Exact solutions to the nonlinear dynamics of learning in deep linear neural networks},
  author={Saxe, Andrew M and McClelland, James L and Ganguli, Surya},
  journal={arXiv preprint arXiv:1312.6120},
  year={2013},
  url={https://arxiv.org/abs/1312.6120}
}

@article{blundell2015weight,
  title={Weight uncertainty in neural networks},
  author={Blundell, Charles and Cornebise, Julien and Kavukcuoglu, Koray and Wierstra, Daan},
  journal={arXiv preprint arXiv:1505.05424},
  year={2015},
  url={https://arxiv.org/abs/1505.05424}
}

@article{fortunato2017bayesian,
  title={Bayesian recurrent neural networks},
  author={Fortunato, Meire and Blundell, Charles and Vinyals, Oriol},
  journal={arXiv preprint arXiv:1704.02798},
  year={2017},
  url={https://arxiv.org/abs/1704.02798}
}

@misc{kingma2014adam,
    title={Adam: A Method for Stochastic Optimization},
    author={Diederik P. Kingma and Jimmy Ba},
    year={2014},
    eprint={1412.6980},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
